LIBRERÍAS A UTILIZAR EN EL PROBLEMA
library(naniar)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(fdth)
##
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
##
## sd, var
library(agricolae)
library(UsingR)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:plotly':
##
## subplot
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
##
## cancer
datas<- read.csv("diabetes.csv", header=T, sep=",", dec=".", fill=F)
head(datas)
## patient_number cholesterol glucose hdl_chol chol_hdl_ratio age gender height
## 1 1 193 77 49 3,9 19 female 61
## 2 2 146 79 41 3,6 19 female 60
## 3 3 217 75 54 4 20 female 67
## 4 4 226 97 70 3,2 20 female 64
## 5 5 164 91 67 2,4 20 female 70
## 6 6 170 69 64 2,7 20 female 64
## weight bmi systolic_bp diastolic_bp waist hip waist_hip_ratio diabetes
## 1 119 22,5 118 70 32 38 0,84 No diabetes
## 2 135 26,4 108 58 33 40 0,83 No diabetes
## 3 187 29,3 110 72 40 45 0,89 No diabetes
## 4 114 19,6 122 64 31 39 0,79 No diabetes
## 5 141 20,2 122 86 32 39 0,82 No diabetes
## 6 161 27,6 108 70 37 40 0,93 No diabetes
tail(datas)
## patient_number cholesterol glucose hdl_chol chol_hdl_ratio age gender
## 385 385 255 112 34 7,5 82 male
## 386 386 227 105 44 5,2 83 female
## 387 387 226 279 52 4,3 84 female
## 388 388 301 90 118 2,6 89 female
## 389 389 232 184 114 2 91 female
## 390 390 165 94 69 2,4 92 female
## height weight bmi systolic_bp diastolic_bp waist hip waist_hip_ratio
## 385 66 163 26,3 179 89 37 43 0,86
## 386 59 125 25,2 150 90 35 40 0,88
## 387 60 192 37,5 144 88 41 48 0,85
## 388 61 115 21,7 218 90 31 41 0,76
## 389 61 127 24 170 82 35 38 0,92
## 390 62 217 39,7 160 82 51 51 1
## diabetes
## 385 No diabetes
## 386 No diabetes
## 387 Diabetes
## 388 No diabetes
## 389 Diabetes
## 390 No diabetes
attach(datas)
pct_miss(datas)
## [1] 0
Un análisis inicial muestra que no existen datos faltantes por lo que no hace falta omitir o imputar.
TABLA DE FRECUENCIAS SIMPLE
#Tabla de frecuencias
tab_gluc<-table(glucose)
sum(tab_gluc)
## [1] 390
tab_gluc_abs<-prop.table(tab_gluc)
sum(tab_gluc_abs)
## [1] 1
tab_gluc<-c(tab_gluc,sum(tab_gluc))
tab_gluc_abs<-c(tab_gluc_abs, sum(tab_gluc_abs))
nums<-c(names(tab_gluc))
nums[length(nums)]="Totales"
tab_freq_gluc<-matrix(cbind(tab_gluc, tab_gluc_abs),byrow=T,nrow = 2,ncol=length(nums))
length(tab_freq_gluc)
## [1] 234
colnames(tab_freq_gluc)<-nums
rownames(tab_freq_gluc)<-c("fi", "pi")
tab_freq_gluc
## 48 52 54 56 57 58
## fi 1.000000000 1.000000000 1.000000000 2.000000000 1.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.005128205 0.002564103 0.002564103
## 59 60 62 64 65 66
## fi 1.000000000 1.000000000 1.000000000 2.000000000 2.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.005128205 0.005128205 0.002564103
## 67 68 69 70 71 72
## fi 4.00000000 2.000000000 4.00000000 3.000000000 6.00000000 1.000000000
## pi 0.01025641 0.005128205 0.01025641 0.007692308 0.01538462 0.002564103
## 73 74 75 76 77 78
## fi 2.000000000 10.00000000 8.00000000 10.00000000 11.00000000 5.00000000
## pi 0.005128205 0.02564103 0.02051282 0.02564103 0.02820513 0.01282051
## 79 80 81 82 83 84
## fi 7.00000000 6.00000000 15.00000000 10.00000000 11.00000000 12.00000000
## pi 0.01794872 0.01538462 0.03846154 0.02564103 0.02820513 0.03076923
## 85 86 87 88 89 90
## fi 18.00000000 7.00000000 12.00000000 9.00000000 6.00000000 10.00000000
## pi 0.04615385 0.01794872 0.03076923 0.02307692 0.01538462 0.02564103
## 91 92 93 94 95 96
## fi 10.00000000 14.00000000 3.000000000 8.00000000 7.00000000 2.000000000
## pi 0.02564103 0.03589744 0.007692308 0.02051282 0.01794872 0.005128205
## 97 98 99 100 101 102
## fi 7.00000000 3.000000000 2.000000000 5.00000000 8.00000000 3.000000000
## pi 0.01794872 0.007692308 0.005128205 0.01282051 0.02051282 0.007692308
## 103 104 105 106 107 108
## fi 3.000000000 2.000000000 5.00000000 5.00000000 1.000000000 2.000000000
## pi 0.007692308 0.005128205 0.01282051 0.01282051 0.002564103 0.005128205
## 109 110 111 112 113 115
## fi 3.000000000 2.000000000 3.000000000 5.00000000 2.000000000 3.000000000
## pi 0.007692308 0.005128205 0.007692308 0.01282051 0.005128205 0.007692308
## 117 118 119 120 121 122
## fi 1.000000000 3.000000000 3.000000000 5.00000000 2.000000000 2.000000000
## pi 0.002564103 0.007692308 0.007692308 0.01282051 0.005128205 0.005128205
## 124 125 126 128 130 131
## fi 1.000000000 1.000000000 2.000000000 2.000000000 2.000000000 1.000000000
## pi 0.002564103 0.002564103 0.005128205 0.005128205 0.005128205 0.002564103
## 133 138 145 153 155 161
## fi 1.000000000 1.000000000 1.000000000 1.000000000 3.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.002564103 0.007692308 0.002564103
## 171 172 173 174 176 177
## fi 1.000000000 1.000000000 3.000000000 1.000000000 1.000000000 1.000000000
## pi 0.002564103 0.002564103 0.007692308 0.002564103 0.002564103 0.002564103
## 182 184 185 187 193 196
## fi 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103
## 197 203 206 223 225 228
## fi 2.000000000 1.000000000 3.000000000 1.000000000 2.000000000 1.000000000
## pi 0.005128205 0.002564103 0.007692308 0.002564103 0.005128205 0.002564103
## 233 235 236 239 248 251
## fi 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103
## 255 262 267 269 270 279
## fi 1.000000000 1.000000000 1.000000000 1.000000000 2.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.002564103 0.005128205 0.002564103
## 297 299 330 341 342 369
## fi 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## pi 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103 0.002564103
## 371 385 Totales
## fi 1.000000000 1.000000000 390
## pi 0.002564103 0.002564103 1
TABLA DE FRECUENCIAS COMPLETA:
INTERVALO STURGES
tabla_completa<- fdt(glucose,breaks="Sturges",right=T); tabla_completa
## Class limits f rf rf(%) cf cf(%)
## (47.52,81.653] 109 0.28 27.95 109 27.95
## (81.653,115.786] 203 0.52 52.05 312 80.00
## (115.786,149.919] 28 0.07 7.18 340 87.18
## (149.919,184.052] 15 0.04 3.85 355 91.03
## (184.052,218.185] 10 0.03 2.56 365 93.59
## (218.185,252.318] 10 0.03 2.56 375 96.15
## (252.318,286.451] 7 0.02 1.79 382 97.95
## (286.451,320.584] 2 0.01 0.51 384 98.46
## (320.584,354.717] 3 0.01 0.77 387 99.23
## (354.717,388.85] 3 0.01 0.77 390 100.00
A partir de la tabla de frecuencias se observa que la mayor frecuencia está en la segunda clase, es decir, en el intervalo de 81.653 a 115.786, lo cual indica que de la población estudiada, la mayoría tiene un nivel de glucosa menor a 140 mg/dl, el cual es un nivel saludable.
#Histograma
hist_glu<-hist(glucose, breaks = seq(40,400,30), plot=T, col = heat.colors(12), prob=T,
main="Histograma de Glucosa", xlab="Nivel de Glucosa (mg/dl)", ylab="%")
lines(density(glucose), col="purple", lwd=3 )
#Poligono
simple.freqpoly(glucose,col=heat.colors(12), lwd=3 , breaks = seq(40,400,30), main="POLÍGONO DE
FRECUENCIAS", xlab="Nivel de Glucosa (mg/dl)", ylab="# de Pacientes")
Las 2 primeras gráficas, Histograma y Polígono de Frecuencias respectivamente confirman que la distribución es unimodal con niveles de Glucosa Saludables
#Caja y brazos
help("plot_ly")
## starting httpd help server ... done
caja<-plot_ly(datas, x= ~glucose, type="box", name="Distribución Glucosa")
layout(p=caja, title= "Caja y Brazos", colorway="red", xaxis=list(title="Glucosa MG/DL"))
El Diagrama de Caja igual confirma la unimodalidad de la distribución con la señalización de la mediana sesgada al lado Izquierdo, sin embargo también expone de manera clara la existencia de múltiples outliers distribuidos no de manera uniforme pero sí con cierta constancia en valores mayores a 150 mg/dl lo cual indica pacientes con alerta de prediabetes o diabetes tipo 2.
1.- MODA
mfv(glucose)
## [1] 85
2.- MEDIANA
Utilizaremos la fórmula
median(glucose)
## [1] 90
li=81.653
nn=length(glucose)
FA=109
fmd=203
c=115.786-81.653
print("La mediana obtenida por la fórmula es:")
## [1] "La mediana obtenida por la fórmula es:"
mediana=li+(((nn/2)-FA)/fmd)*c ; mediana
## [1] 96.11329
3.- MEDIA
mean(glucose)
## [1] 107.3385
4.- COEFICIENTE DE ASIMETRÍA
skewness(glucose)
## [1] 2.711121
ASIMETRÍA POSITIVA El Coeficiente de Asimetría es mayor a 0, por lo que tiene una tendencia a acumularse del lado izquierdo.
5.- CURTOSIS
kurtosis(glucose)
## [1] 7.905913
La Curtosis indica una curva LEPTOCÚRTICA(MUY PICUDA)
6.- COEFICIENTE DE DESVIACIÓN
sd(glucose)/mean(glucose)
## [1] 0.5012014
Para unir la gráfica interactiva y los puntos a analizar se realizó la Ojiva a partir de la tabla de frecuencias con los intervalos de clase a partir de los percentiles cada 5%, posteriormente se obtienen los valores de cada percentil solicitado (0.15, 0.60, 0.95) y finalmente se colocó una sombra circular sobre el área que lo indica.
CUANTILES
quantile(glucose, probs=c(0.15,0.6, 0.95))
## 15% 60% 95%
## 76.0 94.0 234.1
OJIVA
pru<-hist(glucose, breaks=quantile(glucose, probs = (seq(0,1,0.05)) ), plot=F)
hist_glu<-pru
n1<-length(hist_glu$breaks)
tab_glu_oji<- cbind(hist_glu$breaks[-n1],hist_glu$breaks[-1],
hist_glu$counts,
hist_glu$counts/sum(hist_glu$counts),
cumsum(hist_glu$counts),
cumsum(hist_glu$counts/sum(hist_glu$counts)))
tab_glu_oji
## [,1] [,2] [,3] [,4] [,5] [,6]
## 0% 48.00 68.00 21 0.05384615 21 0.05384615
## 5% 68.00 74.00 26 0.06666667 47 0.12051282
## 10% 74.00 76.00 18 0.04615385 65 0.16666667
## 15% 76.00 78.00 16 0.04102564 81 0.20769231
## 20% 78.00 81.00 28 0.07179487 109 0.27948718
## 25% 81.00 82.00 10 0.02564103 119 0.30512821
## 30% 82.00 84.00 23 0.05897436 142 0.36410256
## 35% 84.00 85.00 18 0.04615385 160 0.41025641
## 40% 85.00 87.00 19 0.04871795 179 0.45897436
## 45% 87.00 90.00 25 0.06410256 204 0.52307692
## 50% 90.00 91.95 10 0.02564103 214 0.54871795
## 55% 91.95 94.00 25 0.06410256 239 0.61282051
## 60% 94.00 97.00 16 0.04102564 255 0.65384615
## 65% 97.00 101.30 18 0.04615385 273 0.70000000
## 70% 101.30 107.75 19 0.04871795 292 0.74871795
## 75% 107.75 115.40 20 0.05128205 312 0.80000000
## 80% 115.40 126.00 20 0.05128205 332 0.85128205
## 85% 126.00 174.20 19 0.04871795 351 0.90000000
## 90% 174.20 234.10 19 0.04871795 370 0.94871795
## 95% 234.10 385.00 20 0.05128205 390 1.00000000
dimnames(tab_glu_oji)[[2]]<-c("Linf","Lsup","f","fr","F","Fr")
h1<- data.frame(cbind(tab_glu_oji[,2], tab_glu_oji[,6]))
ojiva<-plot_ly(h1, x= ~tab_glu_oji[,2], y=~tab_glu_oji[,6],
marker=list(size=15, color="purple"),
type="scatter", mode="lines")
layout(p=ojiva,title="Ojiva(Glucosa)",
xaxis=list(title="Límite Superior"),
yaxis=list(title="%"),
shapes=list(
#lineavertical
list(type="line",x0=0, x1=0, y0=0, y1=1, yref="paper"),
#lineahorizontal
list(type="line",x0=0, x1=400, y0=1, y1=1, yref="paper"),
list(type = 'circle',
xref = 'x', x0 = 70, x1 = 80,
yref = 'y', y0 =0.12 , y1 = 0.18,
fillcolor = 'rgb(50, 20, 90)', line = list(color = 'rgb(50, 20, 90)'),
opacity = 0.7),
list(type = 'circle',
xref = 'x', x0 = 89, x1 = 99,
yref = 'y', y0 = 0.56, y1 = 0.63,
fillcolor = 'rgb(30, 100, 120)', line = list(color = 'rgb(30, 100, 120)'),
opacity = 0.7),
list(type = 'circle', name="Percentil 95%",
xref = 'x', x0 = 230, x1 = 240,
yref = 'y', y0 = 0.91, y1 = 0.97,
fillcolor = 'rgb(90, 200, 75)', line = list(color = 'rgb(90, 200, 75)'),
opacity = 0.7)))
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...